# Coppock, Alexander, Alan S. Gerber, Donald P. Green, and Holger L. Kern. 
# Combining Double Sampling and Bounds to Address Non-Ignorable Missing Outcomes in Randomized Experiments. 
# Political Analysis (in press)
# Simulation analysis. Main text Figure 1 and Appendix Figure 1

rm(list=ls())

# Set your working directory to the replication archive
# setwd() 

library(dplyr)
library(ggplot2)
library(reshape2)
# Uncomment to install latest version
# install.packages("devtools")
# devtools::install_github("acoppock/attrition")

# OR

# Uncomment to install source
# install.packages("attrition_0.0.0.9000.tar.gz", repos = NULL, type="source")
library(attrition)

# These stay constant over the simulations
y1m_c <- 0.50 # mean(Y[R1==1 & Z==0])
y1m_t <- 0.50 # mean(Y[R1==1 & Z==1])
y2m_nm_c <- 0.50 # mean(Y[R2==1 & Z==0])
y2m_nm_t <- 0.50 # mean(Y[R2==1 & Z==1])

s1_c <- .5 # sd(Y[R1==1 & Z==0])
s1_t <- .5 # sd(Y[R1==1 & Z==1])
s2_nm_c <- .5 # sd(Y[R2==1 & Z==0])
s2_nm_t <- .5 # sd(Y[R2==1 & Z==1])

# These vary
n1s_vec <- c(400, 40000)
n2s_vec <- c(50, 5000)
p1_vector <- c(0.25, 0.50, 0.75)
p2_vector <- seq(0, 1, length.out = 1000)
res_mat <- NULL

for(k in seq_along(n1s_vec)){
  for(j in seq_along(p1_vector)){
    for(i in seq_along(p2_vector)){
      
      n1_c <- n1_t <- n1s_vec[k]
      n2_c <- n2_t <- n2s_vec[k]
      p1_c <- p1_t <- p1_vector[j]  
      p2_c <- p2_t <- p2_vector[i]
      
      n1_c_s <- n1_c * p1_c
      n1_t_s <- n1_t * p1_t 
      
      p1_c <- n1_c_s/n1_c
      p1_t <- n1_t_s/n1_t
      
      cis_out <- 
        attrition:::ds_manski_cis_2s(
          n1_t=n1_t,n2_t=n2_t,
          n1_c=n1_c,n2_c=n2_c,
          p1_t=p1_t,p2_t=p2_t,
          s1_t=s1_t,s1_c=s1_c,
          s2_nm_t=s2_nm_t,
          s2_nm_c=s2_nm_c,
          y1m_t=y1m_t,y1m_c=y1m_c,
          y2m_nm_t=y2m_nm_t,
          y2m_nm_c=y2m_nm_c,
          c1a_t=c1a_t,c1r_t=c1r_t,c2a_t=c2a_t,c2r_t=c2r_t,
          p1_c=p1_c,p2_c=p2_c,
          minY=0,maxY=1,alpha=0.05)
      
      res_mat <- rbind(res_mat, c(cis_out, p2= p2_c, p1 = p1_c, n1 = n1_c, n2 = n2_c))
    }
  }
}


# Plot Intervals ----------------------------------------------------------

df <- 
  data.frame(res_mat) %>% 
  melt(id.vars = c("p1", "p2", "n1", "n2")) %>%
  filter(!variable %in% c("low_var", "upp_var", "p1")) %>%
  mutate(type = ifelse(variable %in% c("ci_upper", "ci_lower"), 
                       "95% Confidence Interval", "Identification Region"),
         n_label = paste0("N[1] == ", 2*n1))

label_df <- 
  data.frame(p2 = c(0.05, 0.0375, 0.05),
             value = c(-.65, -.4, -.15),
             p1 = p1_vector,
             label = paste0("p[1] == ", c("0.25", "0.50", "0.75")))

g1 <-
  ggplot(filter(df, n1 == 400), aes(x= p2, y = value, group = interaction(variable, p1), 
                                    linetype = type, color = as.factor(p1))) + 
  geom_line() +
  geom_text(data = label_df, aes(label = label, group=NULL, linetype=NULL), parse = TRUE) + 
  coord_cartesian(xlim = c(0,1), ylim = c(-1, 1)) + 
  scale_linetype_manual(values = c("dashed", "solid")) + 
  scale_color_grey(start = .4, end = 0, guide=FALSE) + 
  xlab(expression(paste("Follow-up Sample Response Rate (", p[2],")"))) + 
  ylab("Identification Region and Expected 95% Confidence Interval") + 
  theme_bw() +
  theme(legend.position = "bottom",
        legend.key.width = unit(3, "lines"),
        legend.title = element_blank()) +
  ggtitle(expression(paste("Intervals for ATE vs ", p[2]))) 

g3 <- g1 %+% df +
  facet_wrap(~n_label, nrow = 2, labeller = label_parsed) +
  theme(strip.background = element_blank())


# Plot widths -------------------------------------------------------------

df_2_cis <- 
  data.frame(res_mat) %>%
  mutate(width = ci_upper - ci_lower,
         type = "95% Confidence Interval",
         p1 = as.factor(p1))

df_2_bounds <- 
  data.frame(res_mat) %>%
  mutate(width = upp_est - low_est,
         type = "Identification Region",
         p1 = as.factor(p1))

df2 <- bind_rows(df_2_cis, df_2_bounds) %>%
  mutate(n_label = paste0("N[1] == ", 2*n1))

label_df <- 
  data.frame(p2 = c(0.05, 0.0375, 0.05),
             width = c(1.60, 1.1, 0.6),
             p1 = p1_vector,
             label = paste0("p[1] == ", p1_vector))

g2 <- 
  ggplot(filter(df2, n1 == 400), aes(x = p2, y = width,linetype = type, color = as.factor(p1))) +
  geom_line() +
  scale_color_grey(start = .4, end = 0, guide=FALSE) + 
  scale_linetype_manual(values = c("dashed", "solid")) + 
  coord_cartesian(xlim = c(0,1), ylim = c(0,2)) +
  theme_bw() +
  geom_text(data = label_df, aes(x = p2, y = width, label = label, linetype=NULL), parse = TRUE, nudge_y = .1) +
  xlab(expression(paste("Follow-up Sample Response Rate (", p[2],")"))) + 
  ylab("Width of Identification Region and Expected 95% Confidence Interval") + 
  theme(legend.position = "bottom",
        legend.key.width = unit(3, "lines"),
        legend.title = element_blank()) +
  ggtitle(expression(paste("Width of Intervals for ATE vs ", p[2])))


g4 <- g2 %+% df2 +
  facet_wrap(~n_label, nrow = 2, labeller = label_parsed) +
  theme(strip.background = element_blank())



multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  require(grid)
  
  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)
  
  numPlots = length(plots)
  
  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                     ncol = cols, nrow = ceiling(numPlots/cols))
  }
  
  if (numPlots==1) {
    print(plots[[1]])
    
  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
    
    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
      
      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}

# Main text Figure 1
multiplot(g1, g2, cols =2)

# Appendix Figure 1
multiplot(g3, g4, cols =2)




